#!/bin/tcsh -ef
#############################################################################
#                                                                           #
# Title: Pick & export particles                               				#
#                                                                           #
# (C) 2dx.org, GNU Public License.                                         #
#                                                                           #
# Created..........: 29/07/2016                                             #
# Last Modification: 04/03/2018                                             #
# Author...........: focus-em.org                                           #
#                                                                           #
#############################################################################
#
# SORTORDER: 2
#
#
# MANUAL: This script picks particles from the 2D crystal images, extracts and exports them to be refined in single-particle fashion. Supports FREALIGN (.par) and RELION (.star) meta-data formats.
#
# DISPLAY: Thread_Number
# DISPLAY: SPR_DIR
# DISPLAY: SPR_sample_pixel
# DISPLAY: SPR_doresample
# DISPLAY: SPR_boxsize
# DISPLAY: SPR_PhaseShift
# DISPLAY: SPR_SIGCC
# DISPLAY: SPR_InvertContrast
# DISPLAY: SPR_NormalizeBox
# DISPLAY: SPR_SigmaNorm
# DISPLAY: SPR_RadNorm
# DISPLAY: SPR_WhichCTF
# DISPLAY: SPR_WhichHalfSet
# DISPLAY: SPR_CalculateDefocusTilted
# DISPLAY: SPR_SavePhaseFlipped
# DISPLAY: SPR_SaveCTFMultiplied
# DISPLAY: SPR_SaveWienerFiltered
# DISPLAY: SPR_WienerConstant
# DISPLAY: SPR_WhichTiltGeometry
# DISPLAY: CS
# DISPLAY: KV
# DISPLAY: sample_pixel
# DISPLAY: phacon
# DISPLAY: magnification
# DISPLAY: SPR_SYM
# DISPLAY: SPR_SavePickFig
# DISPLAY: SPR_Ignore_Lat2
# DISPLAY: SPR_Ignore_C1
# DISPLAY: SPR_CreateStarFile
#
#$end_local_vars
#

echo "<<@progress: 0>>"

set Thread_Number = ""
set bin_2dx = ""
set proc_2dx = ""

set scriptname = PickExportParticleStack

#
set SPR_sample_pixel = ""
set SPR_doresample = ""
set SPR_RESMIN = ""
set SPR_RESMAX = ""
set SPR_PhaseShift = ""
set SPR_boxsize = ""
set SPR_SIGCC = ""
set SPR_InvertContrast = ""
set SPR_NormalizeBox = ""
set SPR_CalculateDefocusTilted = ""
set SPR_SavePhaseFlipped = ""
set SPR_SaveCTFMultiplied = ""
set SPR_SaveWienerFiltered = ""
set SPR_WienerConstant = ""
set SPR_SigmaNorm = ""
set SPR_RadNorm = ""
set SPR_WhichTiltGeometry = ""
set SPR_WhichCTF = ""
set SPR_WhichHalfSet = ""
set SPR_SavePickFig = ""
set CS = ""
set KV = ""
set sample_pixel = ""
set phacon = ""
set magnification = ""
set use_masked_image = "n"
set SPR_Ignore_Lat2 = ""
set SPR_Ignore_C1 = ""
set SPR_CreateStarFile  = ""
#
set SPR_DIR = ""
set SPR_IMGS_DIR = ../
set SPR_MERGEFILE = 2dx_merge_dirfile.dat
set SPR_STACKS_DIR = ${SPR_DIR}/stacks/
set SPR_PICKING_DIR = ${SPR_DIR}/picking/
set SPR_STACK_ROOTNAME = 'particles'


#$end_vars

# Calculate amplitude contrast from phase contrast
set ampcon=`echo "sqrt(1.0 - ${phacon}^2)" | bc`
set ampcon = `printf "%0.2f\n" ${ampcon}`

set ccp4_setup = 'y'
source ${proc_2dx}/initialize
#
if ( -e LOGS/${scriptname}.results ) then
	\mv LOGS/${scriptname}.results LOGS/${scriptname}.results.old
endif

# Start script commands:
mkdir -p ${SPR_STACKS_DIR}

if ( ${SPR_SavePickFig} == "y" ) then
	mkdir -p ${SPR_PICKING_DIR}
endif

if ( ${SPR_WhichCTF} == "Micrograph" ) then
	set SPR_SaveWienerFiltered = "n"
	set SPR_SavePhaseFlipped = "n"
endif
# The desired pixel size for SPR processing may be different from the one used for 2DX processing:
if ( ${SPR_sample_pixel} == "" ) then
	set SPR_sample_pixel = ${sample_pixel}
endif
# There may be some duplicate images in the dataset, by accident. The following script will remove duplicate entries based on the images names:
# ${app_python} ${proc_2dx}/SPR_RemoveDuplicateEntries.py ${SPR_MERGEFILE} ${SPR_DIR}/2dx_merge_dirfile-unique.dat ${SPR_Ignore_Lat2} ${SPR_Ignore_C1}
\cp ${SPR_MERGEFILE} ${SPR_DIR}/2dx_merge_dirfile-unique.dat
if ( ${SPR_Ignore_Lat2} == "y" ) then
	sed '/_lat2/d' ${SPR_DIR}/2dx_merge_dirfile-unique.dat > ${SPR_DIR}/2dx_merge_dirfile-unique.dat.tmp
	\mv ${SPR_DIR}/2dx_merge_dirfile-unique.dat.tmp ${SPR_DIR}/2dx_merge_dirfile-unique.dat
endif
if ( ${SPR_Ignore_C1} == "y" ) then
	sed '/_c1/d' ${SPR_DIR}/2dx_merge_dirfile-unique.dat > ${SPR_DIR}/2dx_merge_dirfile-unique.dat.tmp
	\mv ${SPR_DIR}/2dx_merge_dirfile-unique.dat.tmp ${SPR_DIR}/2dx_merge_dirfile-unique.dat
endif

# Let's make sure we don't use more threads than there are images to pick particles from:
set n_img = `cat ${SPR_DIR}/2dx_merge_dirfile-unique.dat | wc -l`
if ( ${n_img} < ${Thread_Number} ) then
	set Thread_Number = ${n_img}
else
# Also ensure that we don't have a wrong number of threads that will break parallelism:
	set load_img=`echo "${n_img}/${Thread_Number}" | bc -l`
	set load_img=`printf %.0f ${load_img}`
	while ( `echo "(${Thread_Number}-1)*${load_img} >= ${n_img}" | bc -l` )
		@ Thread_Number--
		set load_img=`echo "${n_img}/${Thread_Number}" | bc -l`
		set load_img=`printf %.0f ${load_img}`
		# echo ${load_img}
	end
endif
echo "::Will use ${Thread_Number} threads for optimal load balancing."

set t = 1
set pids = ""
while ( $t <= ${Thread_Number} )
	${app_python} ${proc_2dx}/SPR_ExtractParticles.py ${SPR_IMGS_DIR} ${SPR_DIR}/2dx_merge_dirfile-unique.dat ${SPR_PICKING_DIR} ${SPR_STACKS_DIR} ${SPR_STACK_ROOTNAME} ${SPR_boxsize} ${SPR_PhaseShift} ${SPR_sample_pixel} ${KV} ${CS} ${phacon} ${magnification} ${SPR_SIGCC} ${SPR_InvertContrast} ${SPR_NormalizeBox} ${SPR_CalculateDefocusTilted} ${SPR_SavePhaseFlipped} ${SPR_SaveCTFMultiplied} ${SPR_SaveWienerFiltered} ${SPR_WienerConstant} ${SPR_SigmaNorm} ${SPR_RadNorm} ${SPR_WhichTiltGeometry} ${SPR_WhichCTF} ${SPR_SavePickFig} ${use_masked_image} ${SPR_doresample} ${Thread_Number} ${t} &
	set pids = "$pids $!"
	@ t++
end

# Wait for all jobs to finish
while ( `ps -p ${pids} | wc -l` > 1 )
	wait
end

### BELOW IS A SERIOUS KLUDGE BECAUSE EMAN2/SPARX DO NOT SUPPORT WRITING TO MRC STACKS BIGGER THAN 65535 IMAGES DIRECTLY ###
# We specify each possible stack explicitly to avoid processing others that may exist in the folder:
echo "::Now organizing and converting partial stack(s) to single .mrcs and .par files..."

set stacks = ""
# if ( ${SPR_WhichCTF} == "Micrograph" ) then
# 	set stacks = "$stacks _ctfcor"
# else
	set stacks = "$stacks raw"
	if ( ${SPR_SaveWienerFiltered} == "y" ) then
		set stacks = "$stacks _wiener-filtered"
	endif
	if ( ${SPR_SavePhaseFlipped} == "y" ) then
		set stacks = "$stacks _phase-flipped"
	endif
	if ( ${SPR_SaveCTFMultiplied} == "y" ) then
		set stacks = "$stacks _ctf-multiplied"
	endif
# endif

# foreach s ( raw _ctfcor _phase-flipped _wiener-filtered )
foreach s ( ${stacks} )

	# echo ${s}

	if  ( ${s} == "raw" ) then
		set i = ""
	else
		set i = ${s}
	endif

	# echo ${i}

	if ( -e ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}_1_r1.par ) then
		echo "::There is already a ${SPR_STACK_ROOTNAME}${i}_1_r1.par file in ${SPR_STACKS_DIR}! Will back it up."
		\mv ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}_1_r1.par ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}_1_r1.par.bkp
	endif

	if ( -e ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}.mrcs ) then
		echo "::There is already a ${SPR_STACK_ROOTNAME}${i}.mrcs file in ${SPR_STACKS_DIR}! Will back it up."
		\mv ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}.mrcs ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}.mrcs.bkp
	endif

	if ( -e ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}_coordinates_master.txt ) then
		echo "::There is already a ${SPR_STACK_ROOTNAME}${i}_coordinates_master.txt file in ${SPR_STACKS_DIR}! Will back it up."
		\mv ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}_coordinates_master.txt ${SPR_STACK_ROOTNAME}_coordinates_master.txt.bkp
	endif

	if ( -e ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}-0001.mrcs ) then

		set t = 1

		# echo "::Creating single ${SPR_STACK_ROOTNAME}${i}.hdf stack..."
		while ( $t <= ${Thread_Number} )

			set num = `printf "%.4d" $t`
			# Append to HDF stack first... the kludge...
			# e2proc2d.py ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}-${num}.hdf ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}.hdf
			# \rm ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}-${num}.hdf

			# Append partial .par file to the main one...
			if  ( ${s} == "raw" || ${s} == "_ctfcor" ) then
				cat ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}_1_r1-${num}.par >> ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}_1_r1.par
				\rm ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}_1_r1-${num}.par
				# Append the partial coordinate files to the main one:
				cat ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}_coordinates_master-${num}.txt >> ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}_coordinates_master.txt
			else
				# Let's make copies of the base .par file for each type of stack we may have:
				if ( -e ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}_1_r1.par ) then
					cp ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}_1_r1.par ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}_1_r1.par
				else
					cp ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}_ctfcor_1_r1.par ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}_1_r1.par
				endif
			endif

			# Append partial .mrcs file to the main one...
			if ( ${num} == "0001" ) then
				tail -c +0 ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}-${num}.mrcs >> ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}.mrcs
			else
				if ( -e ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}-${num}.mrcs ) then
					tail -c +1025 ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}-${num}.mrcs >> ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}.mrcs
				endif
			endif

			@ t++

		end
		echo "::Cleaning up partial ${SPR_STACK_ROOTNAME}${i} stacks..."
		\rm ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}-*.mrcs

		# The below script call is necessary to ensure correct number of images in the header of the final .mrcs files. If the --stats option makes it too slow, one can (almost always) safely omit it.
		# ${proc_2dx}/MRCheaderUpdate.py ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}.mrcs --stats
		${app_python} ${proc_2dx}/MRCheaderUpdate.py ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}.mrcs

		# Correct the particle numbers in the .par file:
		if  ( ${s} == "raw" || ${s} == "_ctfcor" ) then
			awk '{$1=NR; print}' ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}_1_r1.par > ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}_1_r1.par.tmp
			\mv ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}_1_r1.par.tmp ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}_1_r1.par

			echo "::Cleaning up partial ${SPR_STACK_ROOTNAME}${i} coordinate files..."
			\rm ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}_coordinates_master-*.txt
			# Correct the particle numbers in the master coordinates file:
			awk '{$1=NR; print}' ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}_coordinates_master.txt > ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}_coordinates_master.txt.old
			\cp ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}_coordinates_master.txt.old ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}_coordinates_master.txt
		endif

		if ( ${SPR_WhichHalfSet} == "crystal-based" ) then

			echo "::Now splitting ${i} dataset into crystal-based half-sets..."

			\mv ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}.mrcs ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}.mrcs.old
			\mv ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}_1_r1.par ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}_1_r1.par.old

			${app_python} ${proc_2dx}/SPR_SplitHalfSets.py ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}_1_r1.par.old ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}_1_r1.par ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}.mrcs.old ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}.mrcs
			
			\rm ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}.mrcs.old

			if  ( ${s} == "raw" || ${s} == "_ctfcor" ) then
				# Now exclude the exceeding particles from the coordinates file.
				# This will append 'd' to the line numbers to be deleted and output a valid sed script:
				sed 's%$%d%' ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}_1_r1.par.old.excluded.idx > ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}_1_r1.par.old.excluded.idx.tmp
				# This deletes the required lines from the coordinates master file and outputs a temporary file:
				sed -f ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}_1_r1.par.old.excluded.idx.tmp ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}_coordinates_master.txt.old > ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}_coordinates_master.txt.tmp
				# Now we correct the line numbering to match the actual dataset in the .par file:
				awk '{$1=NR; print}' ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}_coordinates_master.txt.tmp > ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}_coordinates_master.txt
				\rm ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}_coordinates_master.txt.tmp
				\rm ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}_coordinates_master.txt.old

				# And remove the temporary files:
				\rm ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}_1_r1.par.old.excluded.idx.tmp
			endif

		endif

		if ( ${SPR_CreateStarFile} == "y" ) then

			echo "::Creating .star file for ${i} stack..."

			if  ( ${s} == "raw" || ${s} == "_ctfcor" ) then

				${app_python} ${proc_2dx}/par2star.py ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}_1_r1.par ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}_1_r1.star --angpix ${sample_pixel} --ampcon ${ampcon} --kv ${KV} --cs ${CS} --stack ${SPR_STACK_ROOTNAME}${i}.mrcs --crystal
			else

				# Let's make copies of the base .star file for each type of stack we may have:
				if ( -e ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}_1_r1.star ) then
					cp ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}_1_r1.star ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}_1_r1.star
				else
					cp ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}_ctfcor_1_r1.star ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}_1_r1.star
				endif

		endif

	endif

	echo "# IMAGE-IMPORTANT MRCS: ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}${i}.mrcs <STACK: ${SPR_STACK_ROOTNAME}${i}.mrcs>" >> LOGS/${scriptname}.results

	echo "<<@progress: +5>>"

	echo "::Done!"

end
if ( ${SPR_WhichHalfSet} == "crystal-based" ) then
	\rm ${SPR_STACKS_DIR}/${SPR_STACK_ROOTNAME}*_1_r1.par.old
endif

if ( ${SPR_SavePickFig} == "y" ) then

	foreach i (${SPR_PICKING_DIR}/*.png)
		echo "# IMAGE PNG: $i <PNG: `basename $i`>" >> LOGS/${scriptname}.results
	end
	
endif

echo "<<@progress: 100>>"

echo ":: "
echo ":: Done!"
echo ":: "
